#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(ggpubr)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr) ; library(kableExtra)
library(biomaRt)
library(clusterProfiler) ; library(ReactomePA) ; library(DOSE) ; library(org.Hs.eg.db)
library(WGCNA)

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Load preprocessed dataset (preprocessing code in 01_data_preprocessing.Rmd) and clustering (pipeline in 05_WGCNA.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
rownames(datExpr) = datGenes$ensembl_gene_id
DE_info = DE_info %>% data.frame


# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]


# Clusterings
clusterings = read_csv('./../Data/clusters.csv')


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('entrezgene'=rownames(.) %>% as.numeric) %>% 
             dplyr::rename('padj' = adj.P.Val, 'log2FoldChange' = logFC) %>%
             left_join(datGenes %>% dplyr::select(entrezgene, ensembl_gene_id) %>% 
                       dplyr::rename('ID' = ensembl_gene_id), by = 'entrezgene') %>% 
             left_join(SFARI_genes, by='ID') %>% 
             mutate(`gene-score`=ifelse(is.na(`gene-score`), 'Others', `gene-score`)) %>%
             distinct(ID, .keep_all = TRUE) %>% left_join(GO_neuronal, by='ID') %>%
             mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
             left_join(clusterings, by='ID') %>%
             mutate(gene.score=ifelse(`gene-score`=='Others' & Neuronal==1, 'Neuronal', `gene-score`),
                    significant=padj<0.05 & !is.na(padj))

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)

genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))


clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]

dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]

load('./../Data/GSEA.RData')
GSEA_SFARI = enrichment_SFARI

load('./../Data/ORA.RData')
ORA_SFARI = enrichment_SFARI


rm(DE_info, GO_annotations, clusterings, getinfo, mart, efit, enrichment_DGN, enrichment_DO, enrichment_GO,
   enrichment_KEGG, enrichment_Reactome, enrichment_SFARI)


Selecting Top Modules


We have results both from GSEA and ORA to measure the enrichment of SFARI Genes in each module, and they both agree with each other relatively well

SFARI_genes_by_module = c()

for(module in names(GSEA_SFARI)){
  
  GSEA_info = GSEA_SFARI[[module]] %>% dplyr::select(ID, pvalue, p.adjust, NES) %>%
              mutate(pvalue = ifelse(NES>0, pvalue, 1-pvalue), 
                     p.adjust = ifelse(NES>0, p.adjust, 1)) %>%
              dplyr::rename('GSEA_pval' = pvalue, 'GSEA_padj'= p.adjust)
  
  ORA_info = ORA_SFARI[[module]] %>% dplyr::select(ID, pvalue, p.adjust, qvalue, GeneRatio, Count) %>%
             dplyr::rename('ORA_pval' = pvalue, 'ORA_padj' = p.adjust)
  
  module_info = GSEA_info %>% full_join(ORA_info, by = 'ID') %>% add_column(.before = 'ID', Module = module)
  
  SFARI_genes_by_module = rbind(SFARI_genes_by_module, module_info)
}

SFARI_genes_by_module = SFARI_genes_by_module %>% 
                        left_join(dataset %>% dplyr::select(Module, MTcor) %>% 
                                  group_by(Module,MTcor) %>% tally %>% ungroup, by = 'Module') %>%
                        mutate(ORA_pval = ifelse(is.na(ORA_pval), 1, ORA_pval),
                               ORA_padj = ifelse(is.na(ORA_padj), 1, ORA_padj))

plot_data = SFARI_genes_by_module %>% filter(ID=='SFARI')

ggplotly(plot_data %>% ggplot(aes(1-GSEA_pval, 1-ORA_pval, size = n)) + 
         geom_point(color = plot_data$Module, alpha = .6, aes(id=Module)) + 
         geom_smooth(se=FALSE, color = '#CCCCCC') + 
         xlab('GSEA Enrichment') + ylab('ORA Enrichment') + coord_fixed() +
         ggtitle(paste0('Corr = ', round(cor(plot_data$GSEA_pval, plot_data$ORA_pval),2))) +
         theme_minimal() + theme(legend.position = 'none'))



To determine which modules have a statistically significant enrichment in SFARI Genes we can use the adjusted p-values. We used the Bonferroni correction for this.

GSEA identifies 51/179 as significant. This doesn’t make sense

ggplotly(plot_data %>% ggplot(aes(GSEA_padj, ORA_padj, size = n)) + 
         geom_point(color = plot_data$Module, alpha = .7, aes(id=Module)) + 
         geom_smooth(se=FALSE, color = '#CCCCCC') + 
         geom_hline(yintercept = 0.01, color = 'gray', linetype = 'dashed') +
         geom_vline(xintercept = 0.01, color = 'gray', linetype = 'dashed') +
         xlab('GSEA adjusted p-value') + ylab('ORA adjusted p-value') + 
         scale_x_log10(limits = c(min(plot_data$GSEA_padj, plot_data$ORA_padj),1.2)) + 
         scale_y_log10(limits = c(min(plot_data$GSEA_padj, plot_data$ORA_padj),1.2)) +
         ggtitle(paste0('Corr = ',round(cor(plot_data$GSEA_padj, plot_data$ORA_padj),2))) + coord_fixed() +
         theme_minimal() + theme(legend.position = 'none'))
plot_data = plot_data %>% mutate(GSEA_sig = GSEA_padj<0.01, ORA_sig = ORA_padj<0.01) %>%
            apply_labels(GSEA_sig = 'GSEA significant enrichment',
                         ORA_sig = 'ORA significant enrichment')

cro(plot_data$GSEA_sig, list(plot_data$ORA_sig, total()))
 ORA significant enrichment     #Total 
 FALSE   TRUE   
 GSEA significant enrichment 
   FALSE  127 1   128
   TRUE  48 3   51
   #Total cases  175 4   179



The ‘over-enrichment’ in SFARI Modules in GSEA could be because SFARI Genes have in general higher Module Memberships than the other genes, which would make them cluster at the beginning of the list constantly and would bias the enrichment analysis.

Looking at the plot below, we can see that there is not a uniform distribution of SFARI genes across all quantiles of the Module Membership values, but they instead seem to cluster around Module Membership values with high magnitudes (both positive and negative), so I don’t think the GSEA results for the SFARI genes are valid.

Because of this, I’m going to use the enrichment from the ORA to study the SFARI Genes

quant_data = dataset %>% dplyr::select(ID, contains('MM.')) %>% 
             left_join(genes_info %>% dplyr::select(ID, gene.score), by = 'ID') %>% dplyr::select(-ID) %>%
             melt %>% mutate(quant = cut(value, breaks = quantile(value, probs = seq(0,1,0.05)) %>% 
                                     as.vector, labels = FALSE)) %>%
             group_by(gene.score, quant) %>% tally %>% ungroup %>% ungroup
  
quant_data = quant_data %>% group_by(quant) %>% summarise(N = sum(n)) %>% ungroup %>% 
            left_join(quant_data, by = 'quant') %>% dplyr::select(quant, gene.score, n, N) %>% 
            mutate(p = round(100*n/N,2)) %>% filter(!is.na(quant)) %>%
            mutate(gene.score = factor(gene.score, levels = rev(c('1','2','3','Neuronal','Others'))))

ggplotly(quant_data %>% filter(!gene.score %in% c('Neuronal','Others')) %>% 
         ggplot(aes(quant, p, fill = gene.score)) + geom_bar(stat='identity') + 
         xlab('Module Membership Quantiles') + ylab('% of SFARI Genes in Quantile') +
         ggtitle('Percentage of Genes labelled as SFARI in each Quantile') +
         scale_fill_manual(values = SFARI_colour_hue(r=rev(c(1:3)))) + 
         theme_minimal() + theme(legend.position = 'none'))
rm(quant_data)


Selecting modules with an adjusted p-value below 0.01 using the ORA

ggplotly(plot_data %>% ggplot(aes(MTcor, ORA_padj, size=n)) + 
         geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) +
         geom_hline(yintercept = 0.01, color = 'gray', linetype = 'dotted') + 
         xlab('Module-Diagnosis Correlation') + ylab('Corrected p-values') + scale_y_log10() +
         ggtitle('Modules Significantly Enriched in SFARI Genes') +
         theme_minimal() + theme(legend.position = 'none'))
top_modules = plot_data %>% arrange(desc(ORA_padj)) %>% filter(ORA_padj<0.01) %>% pull(Module) %>% as.character

plot_data %>% filter(Module %in% top_modules) %>% arrange(ORA_pval) %>%
              dplyr::select(Module, MTcor, ORA_pval, ORA_padj, qvalue, GeneRatio, Count) %>%
              rename( ORA_pval = 'p-value', ORA_padj = 'Adjusted p-value') %>%
              kable %>% kable_styling(full_width = F)
Module MTcor p-value Adjusted p-value qvalue GeneRatio Count
#A68AFF 0.2417718 0.0000981 0.0004904 0.0001033 7/24 7
#00BBDC 0.3504563 0.0002836 0.0014180 0.0008956 7/28 7
#00BF76 -0.4033879 0.0007926 0.0039631 0.0016687 10/63 10
#26B700 -0.1792697 0.0016871 0.0050612 0.0008879 5/19 5
pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% 
            left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
            dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
            mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
            mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
                   alpha = ifelse(ImportantModules=='Others', 0.1, 1),
                   gene_id = paste0(ID, ' (', external_gene_id, ')')) %>%
            apply_labels(ImportantModules = 'Top Modules')

plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) + 
              geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
              ggtitle('Genes belonging to the Modules with the highest SFARI Genes Enrichment')

rm(pca)





Identifying representative genes for each Module


Following the WGCNA pipeline, selecting the genes with the highest Module Membership and Gene Significance

Top 20 genes for each module


Ordered by \(\frac{MM+|GS|}{2}\)

There aren’t that many SFARI genes in the top genes of the modules

create_table = function(module){
  top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>% 
              dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
              filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>% 
              mutate(Relevance = (MM+abs(GS))/2) %>% arrange(by=-Relevance) %>% top_n(20) %>%
              dplyr::rename('Gene Symbol' = external_gene_id, 'SFARI Score' = gene.score)
  return(top_genes)
}

top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])

kable(top_genes[[1]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[1], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[1]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #26B700 (MTcor = -0.18)
Gene Symbol MM GS SFARI Score Relevance
HLA-DRA 0.8747479 -0.3649290 Others 0.6198384
CD74 0.9157544 -0.1742287 Others 0.5449915
HLA-DRB1 0.8532709 0.0972571 3 0.4752640
HLA-DPA1 0.8156707 -0.1306145 Others 0.4731426
CIITA 0.6334165 -0.3008236 Others 0.4671200
HLA-DOA 0.8283354 -0.0942383 Others 0.4612868
HLA-DPB1 0.7835951 -0.1023942 3 0.4429947
FGL2 0.6498709 -0.2184730 Others 0.4341720
HAVCR2 0.6660515 -0.1266919 Others 0.3963717
GLTSCR1L 0.4917052 -0.1907723 Others 0.3412388
HLA-DQA1 0.4583689 -0.0686622 Others 0.2635156
CARTPT 0.4456341 0.0345264 Others 0.2400802
APOBEC3G 0.4263521 0.0301908 Others 0.2282714
YWHAQ 0.4042128 -0.0387991 Others 0.2215059
SLC27A2 0.3672742 0.0453783 Others 0.2063262
kable(top_genes[[2]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[2], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[2]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #00BF76 (MTcor = -0.4)
Gene Symbol MM GS SFARI Score Relevance
CHD7 0.8185871 -0.5338120 1 0.6761995
GPR116 0.6712372 -0.5646098 Others 0.6179235
DNAJC21 0.7593297 -0.4733788 Others 0.6163542
UIMC1 0.7666779 -0.4329504 3 0.5998142
SPECC1 0.8511586 -0.3400076 Others 0.5955831
PTK2 0.7045439 -0.4591038 Others 0.5818239
ZNF608 0.7629912 -0.3716821 Others 0.5673366
DSC1 0.6959402 -0.4344299 Others 0.5651850
PHF21A 0.7433904 -0.3700826 1 0.5567365
EHMT1 0.6989350 -0.4077613 1 0.5533482
TTLL5 0.7486423 -0.2941657 Others 0.5214040
TRUB2 0.4645929 -0.5391575 Others 0.5018752
RAD54B 0.6086840 -0.3338852 Others 0.4712846
NKIRAS1 0.4654338 -0.4735435 Others 0.4694886
MAP4 0.6561171 -0.2630500 Others 0.4595836
BCAN 0.5210378 -0.3559353 Others 0.4384866
CDK5RAP2 0.7024048 -0.1651121 Others 0.4337585
ING2 0.5735163 -0.2932299 Others 0.4333731
CCDC7 0.6639173 -0.1834586 Others 0.4236879
RNF220 0.5499203 -0.2943829 Others 0.4221516
kable(top_genes[[3]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[3], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[3]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #00BBDC (MTcor = 0.35)
Gene Symbol MM GS SFARI Score Relevance
MACF1 0.8659623 0.2645816 Others 0.5652719
NXPE2 0.6461758 0.4527530 Others 0.5494644
SETD5 0.7530549 0.3282670 1 0.5406610
KALRN 0.6547142 0.3550189 Others 0.5048666
DST 0.6684159 0.3320908 3 0.5002533
VAMP5 0.6552634 0.3415350 Others 0.4983992
NIN 0.8427420 0.1529265 Others 0.4978343
PHF3 0.4558986 0.5183742 1 0.4871364
SHC1 0.6521559 0.2736955 Others 0.4629257
PSG9 0.5825269 0.3258620 Others 0.4541944
ITPR2 0.5174786 0.3377800 Others 0.4276293
SYNE1 0.6390105 0.1993551 3 0.4191828
CUL7 0.5084846 0.3253475 2 0.4169160
CDK10 0.4796263 0.2977635 Others 0.3886949
DLG2 0.5819806 0.1746726 2 0.3783266
CHST9 0.5701735 -0.1747792 Others 0.3724764
LRRC37A3 0.6552248 0.0470024 Others 0.3511136
TRIO 0.6781371 -0.0019589 1 0.3400480
MARCH3 0.4647628 0.2058328 Others 0.3352978
PLEKHM1 0.4858289 -0.0370002 Others 0.2614146
rm(create_table, i)

The top genes tend to have a relatively high mean level of expression

pca = datExpr %>% prcomp

ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
            mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
            mutate(alpha = ifelse(color %in% top_modules & ID %in% ids, 1, 0.1))

plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) + 
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
              theme_minimal() + ggtitle('Most relevant genes for top Modules')

rm(ids, pca, tg, plot_data)




Enrichment Analysis


Using the package clusterProfiler. Performing Gene Set Enrichment Analysis (GSEA) and Over Representation Analysis (ORA) using the following datasets:

  • Gene Ontology

  • Disease Ontology

  • Disease Gene Network

  • KEGG

  • REACTOME

# GSEA
load('./../Data/GSEA.RData')

# Rename lists
GSEA_GO = enrichment_GO
GSEA_DGN = enrichment_DGN
GSEA_DO = enrichment_DO
GSEA_KEGG = enrichment_KEGG
GSEA_Reactome = enrichment_Reactome
GSEA_SFARI = enrichment_SFARI


# ORA
load('./../Data/ORA.RData')

# Rename lists
ORA_GO = enrichment_GO
ORA_DGN = enrichment_DGN
ORA_DO = enrichment_DO
ORA_KEGG = enrichment_KEGG
ORA_Reactome = enrichment_Reactome
ORA_SFARI = enrichment_SFARI

rm(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome)
compare_methods = function(GSEA_list, ORA_list){
  
  for(top_module in top_modules){
  
    cat(paste0('  \n  \nEnrichments for Module ', top_module, ' (MTcor=',
               round(dataset$MTcor[dataset$Module==top_module][1],2), '):  \n  \n'))
    
    GSEA = GSEA_list[[top_module]]
    ORA = ORA_list[[top_module]]
    
    cat(paste0('GSEA has ', nrow(GSEA), ' enriched terms  \n'))
    cat(paste0('ORA has  ', nrow(ORA), ' enriched terms  \n'))
    cat(paste0(sum(ORA$ID %in% GSEA$ID), ' terms are enriched in both methods  \n  \n'))

    plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
                inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% 
                           dplyr::select(ID, pval_ORA, GeneRatio, qvalue), by = 'ID') 
    
    if(nrow(plot_data)>0){
      print(plot_data %>% mutate(pval_mean = pval_ORA + pval_GSEA) %>% 
                          arrange(pval_mean) %>% dplyr::select(-pval_mean) %>% 
            kable %>% kable_styling(full_width = F))
    }
  } 
}


plot_results = function(GSEA_list, ORA_list){
  
  l = htmltools::tagList()

  for(i in 1:length(top_modules)){
    
    GSEA = GSEA_list[[top_modules[i]]]
    ORA = ORA_list[[top_modules[i]]]
    
    plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
                inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% dplyr::select(ID, pval_ORA), by = 'ID')
    
    if(nrow(plot_data)>5){
      min_val = min(min(plot_data$pval_GSEA), min(plot_data$pval_ORA))
      max_val = max(max(max(plot_data$pval_GSEA), max(plot_data$pval_ORA)),0.05)
      ggp = ggplotly(plot_data %>% ggplot(aes(pval_GSEA, pval_ORA, color = NES)) + 
                     geom_point(aes(id = Description)) + 
                     geom_vline(xintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     geom_hline(yintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     ggtitle(paste0('Enriched terms in common for Module ', top_modules[i])) +
                     scale_x_continuous(limits = c(min_val, max_val)) + 
                     scale_y_continuous(limits = c(min_val, max_val)) + 
                     xlab('Corrected p-value for GSEA') + ylab('Corrected p-value for ORA') +
                     scale_colour_viridis(direction = -1) + theme_minimal() + coord_fixed())
      l[[i]] = ggp
    }
  }
  
  return(l)
}


KEGG

compare_methods(GSEA_KEGG, ORA_KEGG)

Enrichments for Module #26B700 (MTcor=-0.18):

GSEA has 24 enriched terms
ORA has 24 enriched terms
19 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
hsa05152 Tuberculosis 2.202978 0.0048776 0.00e+00 8/11 0e+00
hsa05164 Influenza A 1.856789 0.0049026 0.00e+00 7/11 0e+00
hsa04514 Cell adhesion molecules 2.304926 0.0049519 1.10e-06 6/11 0e+00
hsa04145 Phagosome 2.370070 0.0049569 1.00e-06 6/11 0e+00
hsa04640 Hematopoietic cell lineage 2.363607 0.0051608 1.00e-07 6/11 0e+00
hsa05323 Rheumatoid arthritis 2.478602 0.0051624 1.00e-07 6/11 0e+00
hsa05140 Leishmaniasis 2.282038 0.0052551 0.00e+00 6/11 0e+00
hsa04612 Antigen processing and presentation 2.520062 0.0053259 0.00e+00 8/11 0e+00
hsa05150 Staphylococcus aureus infection 2.487320 0.0053259 0.00e+00 6/11 0e+00
hsa05321 Inflammatory bowel disease 2.240381 0.0053435 0.00e+00 6/11 0e+00
hsa05416 Viral myocarditis 2.416675 0.0053435 0.00e+00 6/11 0e+00
hsa04940 Type I diabetes mellitus 2.567063 0.0054584 0.00e+00 6/11 0e+00
hsa04672 Intestinal immune network for IgA production 2.368116 0.0054762 0.00e+00 6/11 0e+00
hsa05320 Autoimmune thyroid disease 2.197707 0.0054762 0.00e+00 6/11 0e+00
hsa05330 Allograft rejection 2.428696 0.0055213 0.00e+00 6/11 0e+00
hsa05332 Graft-versus-host disease 2.559866 0.0055360 0.00e+00 6/11 0e+00
hsa05310 Asthma 2.590835 0.0056600 0.00e+00 6/11 0e+00
hsa05169 Epstein-Barr virus infection 1.703062 0.0575858 6.60e-06 6/11 1e-07
hsa05166 Human T-cell leukemia virus 1 infection 1.671515 0.0807498 1.11e-05 6/11 2e-07

Enrichments for Module #00BF76 (MTcor=-0.4):

GSEA has 5 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00BBDC (MTcor=0.35):

GSEA has 4 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #A68AFF (MTcor=0.24):

GSEA has 25 enriched terms
ORA has 1 enriched terms
1 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
hsa04360 Axon guidance 1.875438 0.0090189 0.0012325 5/13 0.0011509


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_KEGG, ORA_KEGG)


Reactome

compare_methods(GSEA_Reactome, ORA_Reactome)

Enrichments for Module #26B700 (MTcor=-0.18):

GSEA has 24 enriched terms
ORA has 11 enriched terms
7 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
R-HSA-1280215 Cytokine Signaling in Immune system 1.781126 0.0162331 0.0003201 7/13 1.83e-05
R-HSA-913531 Interferon Signaling 1.890160 0.0190785 0.0000033 6/13 2.00e-07
R-HSA-877300 Interferon gamma signaling 2.321918 0.0202082 0.0000001 6/13 0.00e+00
R-HSA-389948 PD-1 signaling 2.265938 0.0223332 0.0000000 5/13 0.00e+00
R-HSA-202427 Phosphorylation of CD3 and TCR zeta chains 2.267208 0.0223668 0.0000000 5/13 0.00e+00
R-HSA-202430 Translocation of ZAP-70 to Immunological synapse 2.236699 0.0225682 0.0000000 5/13 0.00e+00
R-HSA-202433 Generation of second messenger molecules 2.124477 0.0652799 0.0000000 5/13 0.00e+00

Enrichments for Module #00BF76 (MTcor=-0.4):

GSEA has 50 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00BBDC (MTcor=0.35):

GSEA has 19 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #A68AFF (MTcor=0.24):

GSEA has 36 enriched terms
ORA has 7 enriched terms
1 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
R-HSA-422475 Axon guidance 1.716807 0.0154331 0.0185591 6/16 0.0022338


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_Reactome, ORA_Reactome)


Gene Ontology

compare_methods(GSEA_GO, ORA_GO)

Enrichments for Module #26B700 (MTcor=-0.18):

GSEA has 111 enriched terms
ORA has 86 enriched terms
40 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
GO:0002764 immune response-regulating signaling pathway 2.146861 0.0737188 0.0021103 6/14 0.0000725
GO:0002757 immune response-activating signal transduction 2.103415 0.0744176 0.0014164 6/14 0.0000519
GO:0002253 activation of immune response 1.998163 0.0730447 0.0030187 6/14 0.0000830
GO:0042110 T cell activation 2.031128 0.0753165 0.0008022 6/14 0.0000315
GO:0046649 lymphocyte activation 1.965198 0.0714323 0.0070359 6/14 0.0001612
GO:0050863 regulation of T cell activation 2.028704 0.0789124 0.0032545 5/14 0.0000852
GO:0019882 antigen processing and presentation 1.997324 0.0824630 0.0000000 8/14 0.0000000
GO:0002429 immune response-activating cell surface receptor signaling pathway 2.168264 0.0785318 0.0039998 5/14 0.0001000
GO:0050851 antigen receptor-mediated signaling pathway 2.200772 0.0823920 0.0005383 5/14 0.0000247
GO:0034341 response to interferon-gamma 2.108099 0.0836887 0.0000046 6/14 0.0000003
GO:0002768 immune response-regulating cell surface receptor signaling pathway 2.201816 0.0774725 0.0065760 5/14 0.0001572
GO:0048002 antigen processing and presentation of peptide antigen 2.012148 0.0842560 0.0000000 7/14 0.0000000
GO:0071346 cellular response to interferon-gamma 2.180626 0.0848338 0.0000023 6/14 0.0000001
GO:0050852 T cell receptor signaling pathway 2.148759 0.0848338 0.0001467 5/14 0.0000073
GO:0051249 regulation of lymphocyte activation 1.954367 0.0761214 0.0125035 5/14 0.0002751
GO:0060333 interferon-gamma-mediated signaling pathway 2.363246 0.0888672 0.0000001 6/14 0.0000000
GO:0002504 antigen processing and presentation of peptide or polysaccharide antigen via MHC class II 2.129769 0.0894793 0.0000000 7/14 0.0000000
GO:0002495 antigen processing and presentation of peptide antigen via MHC class II 2.147117 0.0895351 0.0000000 7/14 0.0000000
GO:0070665 positive regulation of leukocyte proliferation 2.123407 0.0867491 0.0028635 4/14 0.0000829
GO:0050671 positive regulation of lymphocyte proliferation 2.133298 0.0871329 0.0025323 4/14 0.0000806
GO:0032946 positive regulation of mononuclear cell proliferation 2.126081 0.0870648 0.0026393 4/14 0.0000806
GO:0050670 regulation of lymphocyte proliferation 2.086918 0.0830366 0.0148736 4/14 0.0002895
GO:0050870 positive regulation of T cell activation 2.246731 0.0830366 0.0148736 4/14 0.0002895
GO:0032944 regulation of mononuclear cell proliferation 2.080818 0.0829419 0.0152677 4/14 0.0002895
GO:0070663 regulation of leukocyte proliferation 2.059693 0.0824630 0.0182452 4/14 0.0003237
GO:1903039 positive regulation of leukocyte cell-cell adhesion 2.305850 0.0823920 0.0196439 4/14 0.0003376
GO:0002694 regulation of leukocyte activation 1.956432 0.0743770 0.0299637 5/14 0.0004994
GO:0050865 regulation of cell activation 1.898168 0.0736909 0.0415454 5/14 0.0005712
GO:0022409 positive regulation of cell-cell adhesion 2.198924 0.0810048 0.0352927 4/14 0.0005661
GO:0046651 lymphocyte proliferation 2.046325 0.0807602 0.0375531 4/14 0.0005661
GO:0032943 mononuclear cell proliferation 2.056501 0.0806743 0.0391177 4/14 0.0005661
GO:0051251 positive regulation of lymphocyte activation 2.131962 0.0805290 0.0440986 4/14 0.0005735
GO:0042102 positive regulation of T cell proliferation 2.128206 0.0899588 0.0390238 3/14 0.0005661
GO:0070661 leukocyte proliferation 2.020797 0.0800622 0.0504787 4/14 0.0006169
GO:0032649 regulation of interferon-gamma production 2.085140 0.0898802 0.0407956 3/14 0.0005712
GO:0032609 interferon-gamma production 2.112378 0.0894793 0.0504481 3/14 0.0006169
GO:1903037 regulation of leukocyte cell-cell adhesion 2.158327 0.0796368 0.0629503 4/14 0.0007526
GO:0002696 positive regulation of leukocyte activation 2.201813 0.0791574 0.0762325 4/14 0.0008920
GO:0050867 positive regulation of cell activation 2.201281 0.0786791 0.0899816 4/14 0.0010310
GO:0007159 leukocyte cell-cell adhesion 2.126880 0.0785318 0.0959563 4/14 0.0010770

Enrichments for Module #00BF76 (MTcor=-0.4):

GSEA has 21 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00BBDC (MTcor=0.35):

GSEA has 12 enriched terms
ORA has 5 enriched terms
0 terms are enriched in both methods

Enrichments for Module #A68AFF (MTcor=0.24):

GSEA has 51 enriched terms
ORA has 3 enriched terms
0 terms are enriched in both methods


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_GO, ORA_GO)


Disease Ontology

compare_methods(GSEA_DO, ORA_DO)

Enrichments for Module #26B700 (MTcor=-0.18):

GSEA has 30 enriched terms
ORA has 17 enriched terms
8 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
DOID:3118 hepatobiliary disease 1.800729 0.0098052 0.0001776 8/12 0.0000302
DOID:409 liver disease 1.833261 0.0098729 0.0001279 8/12 0.0000290
DOID:2237 hepatitis 2.036511 0.0103409 0.0000093 8/12 0.0000032
DOID:2043 hepatitis B 2.070166 0.0113925 0.0000000 8/12 0.0000000
DOID:2377 multiple sclerosis 2.033268 0.0115065 0.0010006 5/12 0.0001135
DOID:3213 demyelinating disease 2.026508 0.0114709 0.0012071 5/12 0.0001174
DOID:12361 Graves’ disease 2.182370 0.0122352 0.0699226 3/12 0.0033989
DOID:0060005 autoimmune disease of endocrine system 2.082962 0.0121867 0.0870863 3/12 0.0039149

Enrichments for Module #00BF76 (MTcor=-0.4):

GSEA has 0 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00BBDC (MTcor=0.35):

GSEA has 2 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #A68AFF (MTcor=0.24):

GSEA has 4 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_DO, ORA_DO)


Disease Gene Network

compare_methods(GSEA_DGN, ORA_DGN)

Enrichments for Module #26B700 (MTcor=-0.18):

GSEA has 58 enriched terms
ORA has 63 enriched terms
15 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
umls:C0014072 Experimental Autoimmune Encephalomyelitis 1.713611 0.0448744 0.0006127 6/14 0.0000165
umls:C0007570 Celiac Disease 1.798176 0.0455368 0.0000079 7/14 0.0000004
umls:C0036202 Sarcoidosis 1.906604 0.0473033 0.0000506 6/14 0.0000017
umls:C0038013 Ankylosing spondylitis 1.854257 0.0475532 0.0016618 5/14 0.0000395
umls:C0026896 Myasthenia Gravis 1.897274 0.0490831 0.0003814 5/14 0.0000110
umls:C3495559 Juvenile arthritis 1.745540 0.0455368 0.0083627 5/14 0.0001301
umls:C0242583 Bare Lymphocyte Syndrome 2.280483 0.0565284 0.0000000 6/14 0.0000000
umls:C2931418 Bare lymphocyte syndrome 2 2.280483 0.0565284 0.0000000 6/14 0.0000000
umls:C0005138 Berylliosis 2.278758 0.0576906 0.0000019 4/14 0.0000001
umls:C0020517 Hypersensitivity 2.202802 0.0499555 0.0078476 4/14 0.0001269
umls:C0376545 Hematologic Neoplasms 1.679383 0.0439095 0.0335246 5/14 0.0003307
umls:C0041327 Tuberculosis, Pulmonary 1.919842 0.0483386 0.0297397 4/14 0.0003084
umls:C0035455 Rhinitis 2.356729 0.0523883 0.0626667 3/14 0.0004822
umls:C0036421 Systemic Scleroderma 1.652695 0.0430287 0.0722192 5/14 0.0005284
umls:C0018133 Graft-vs-Host Disease 1.772540 0.0932989 0.0906218 4/14 0.0005817

Enrichments for Module #00BF76 (MTcor=-0.4):

GSEA has 1 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00BBDC (MTcor=0.35):

GSEA has 2 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #A68AFF (MTcor=0.24):

GSEA has 13 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_DGN, ORA_DGN)



Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] WGCNA_1.69             fastcluster_1.1.25     dynamicTreeCut_1.63-1 
##  [4] org.Hs.eg.db_3.8.2     AnnotationDbi_1.46.1   IRanges_2.18.3        
##  [7] S4Vectors_0.22.1       Biobase_2.44.0         BiocGenerics_0.30.0   
## [10] DOSE_3.10.2            ReactomePA_1.28.0      clusterProfiler_3.12.0
## [13] biomaRt_2.40.5         kableExtra_1.1.0       knitr_1.28            
## [16] doParallel_1.0.15      iterators_1.0.12       foreach_1.5.0         
## [19] polycor_0.7-10         expss_0.10.2           ggpubr_0.2.5          
## [22] magrittr_1.5           GGally_1.5.0           gridExtra_2.3         
## [25] viridis_0.5.1          viridisLite_0.3.0      RColorBrewer_1.1-2    
## [28] dendextend_1.13.4      plotly_4.9.2           glue_1.4.1            
## [31] reshape2_1.4.4         forcats_0.5.0          stringr_1.4.0         
## [34] dplyr_1.0.0            purrr_0.3.4            readr_1.3.1           
## [37] tidyr_1.1.0            tibble_3.0.1           ggplot2_3.3.2         
## [40] tidyverse_1.3.0       
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.1.8       Hmisc_4.4-0          
##   [4] fastmatch_1.1-0       plyr_1.8.6            igraph_1.2.5         
##   [7] lazyeval_0.2.2        splines_3.6.3         crosstalk_1.1.0.1    
##  [10] BiocParallel_1.18.1   urltools_1.7.3        digest_0.6.25        
##  [13] htmltools_0.4.0       GOSemSim_2.10.0       GO.db_3.8.2          
##  [16] fansi_0.4.1           checkmate_2.0.0       memoise_1.1.0        
##  [19] cluster_2.1.0         graphlayouts_0.7.0    modelr_0.1.6         
##  [22] matrixStats_0.56.0    enrichplot_1.4.0      prettyunits_1.1.1    
##  [25] jpeg_0.1-8.1          colorspace_1.4-1      rappdirs_0.3.1       
##  [28] blob_1.2.1            rvest_0.3.5           ggrepel_0.8.2        
##  [31] haven_2.2.0           xfun_0.12             crayon_1.3.4         
##  [34] RCurl_1.98-1.2        jsonlite_1.7.0        graph_1.62.0         
##  [37] impute_1.58.0         survival_3.1-12       polyclip_1.10-0      
##  [40] gtable_0.3.0          webshot_0.5.2         UpSetR_1.4.0         
##  [43] graphite_1.30.0       scales_1.1.1          DBI_1.1.0            
##  [46] Rcpp_1.0.4.6          progress_1.2.2        htmlTable_1.13.3     
##  [49] gridGraphics_0.5-0    reactome.db_1.68.0    foreign_0.8-76       
##  [52] bit_1.1-15.2          europepmc_0.4         preprocessCore_1.46.0
##  [55] Formula_1.2-3         htmlwidgets_1.5.1     httr_1.4.1           
##  [58] fgsea_1.10.1          acepack_1.4.1         ellipsis_0.3.1       
##  [61] pkgconfig_2.0.3       reshape_0.8.8         XML_3.99-0.3         
##  [64] farver_2.0.3          nnet_7.3-14           dbplyr_1.4.2         
##  [67] labeling_0.3          ggplotify_0.0.5       tidyselect_1.1.0     
##  [70] rlang_0.4.6           munsell_0.5.0         cellranger_1.1.0     
##  [73] tools_3.6.3           cli_2.0.2             generics_0.0.2       
##  [76] RSQLite_2.2.0         ggridges_0.5.2        broom_0.5.5          
##  [79] evaluate_0.14         yaml_2.2.1            bit64_0.9-7          
##  [82] fs_1.4.0              tidygraph_1.2.0       ggraph_2.0.3         
##  [85] nlme_3.1-147          DO.db_2.9             xml2_1.2.5           
##  [88] compiler_3.6.3        rstudioapi_0.11       curl_4.3             
##  [91] png_0.1-7             ggsignif_0.6.0        reprex_0.3.0         
##  [94] tweenr_1.0.1          stringi_1.4.6         highr_0.8            
##  [97] lattice_0.20-41       Matrix_1.2-18         vctrs_0.3.1          
## [100] pillar_1.4.4          lifecycle_0.2.0       BiocManager_1.30.10  
## [103] triebeard_0.3.0       data.table_1.12.8     cowplot_1.0.0        
## [106] bitops_1.0-6          qvalue_2.16.0         latticeExtra_0.6-29  
## [109] R6_2.4.1              codetools_0.2-16      MASS_7.3-51.6        
## [112] assertthat_0.2.1      withr_2.2.0           mgcv_1.8-31          
## [115] hms_0.5.3             rpart_4.1-15          grid_3.6.3           
## [118] rvcheck_0.1.8         rmarkdown_2.1         ggforce_0.3.1        
## [121] base64enc_0.1-3       lubridate_1.7.4